library(tidyverse)
library(naniar)
library(visdat)
library(lubridate)
library(ggridges)
library(htmltools)
# install.packages("devtools")
#devtools::install_github("dill/emoGG")
library(emoGG)
library(leaflet) # leaflet map
Reading in and exploring the data.
longline <- readxl::read_xlsx("data/longline97-2008.xlsx")
Data Dictionary:
| Variable | Description |
|---|---|
| identifier | Unique Vessel Identifier |
| vessel_code | Vessel Code |
| vessel_code2 | Vessel Code (taking into account company?) |
| vessel_code | Name of Vessel |
| gross_tonnage | Gross tonnage of the vessel |
| vessel_length | Length of the Vessel (m) |
| vessel_width | Vessel Width |
| stanchion | Stanchions on the vessel |
| primary_engine_hp | Horsepower of the primary engine |
| second_engine_hp | Horsepower of the second engine |
| carrying_capacity | The carrying capacity of the vessel (units?) |
| year_constructed | The year the vessel was constructed |
| vessel_criteria | The criteria for the vessel |
| vessel_classification | The classification of the vessel |
Vessel Code
longline %>%
distinct(vessel_code)
## # A tibble: 10 × 1
## vessel_code
## <dbl>
## 1 400062
## 2 400065
## 3 400075
## 4 400082
## 5 400088
## 6 400118
## 7 400061
## 8 400081
## 9 400133
## 10 400512
Vessel Code2
longline %>%
distinct(vessel_code2)
## # A tibble: 10 × 1
## vessel_code2
## <dbl>
## 1 400062
## 2 400065
## 3 400075
## 4 400082
## 5 400088
## 6 400118
## 7 400061
## 8 400081
## 9 400133
## 10 NA
longline %>%
select(vessel_code,
vessel_code2,
vessel_code,
gross_tonnage,
vessel_length,
vessel_width,
primary_engine_hp,
second_engine_hp,
carrying_capacity) %>%
visdat::vis_miss()
longline %>%
group_by(vessel_code) %>%
distinct(vessel_code, vessel_length, vessel_width, gross_tonnage, carrying_capacity) %>%
ggplot(aes(x = vessel_width, y = vessel_length, label = vessel_code)) +
geom_point(aes(size = gross_tonnage)) +
geom_text(check_overlap = TRUE,
size = 3,
hjust = 0,
nudge_x = 0.1) +
lims(x = c(8, 10.5)) +
labs(x = "Vessel Width (m)",
y = "Vessel Length (m)",
size = "Gross Tonnage (tonnes)")
longline %>%
group_by(vessel_code) %>%
distinct(vessel_code, vessel_length, vessel_width, gross_tonnage, carrying_capacity) %>%
mutate(height = carrying_capacity,
width = carrying_capacity) %>%
ggplot(aes(x = vessel_width, y = vessel_length, label = vessel_code, size = I(carrying_capacity/6000))) +
geom_emoji(emoji = "26f4", show.legend = TRUE) +
geom_text(check_overlap = TRUE,
size = 3,
hjust = 0,
nudge_x = 0.1) +
lims(x = c(8, 10.5)) +
labs(x = "Vessel Width (m)",
y = "Vessel Length (m)",
size = "Carrying Capacity")
Observations:
One of the ships has a small carrying capacity for its size. One of the ships is much smaller than the others: 400082.
There is also one vessel for which we are missing a vessel code.
Vessel 400088 has only 2 entries.
longline %>%
select(vessel_code,
gross_tonnage,
vessel_length,
vessel_width,
vessel_classification,
year_constructed) %>%
visdat::vis_miss()
unique(longline$vessel_classification)
## [1] "NA" NA "casual"
Observations:
longline %>%
group_by(vessel_code) %>%
distinct(vessel_code, vessel_length, vessel_width, gross_tonnage, carrying_capacity, year_constructed)
## # A tibble: 10 × 6
## # Groups: vessel_code [10]
## vessel_code gross_tonnage vessel_length vessel_width carrying_capacity
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 400062 505. 44.5 8.4 350
## 2 400065 753 53.4 9.5 200
## 3 400075 537 46.8 8.5 571
## 4 400082 292 26.6 8 72
## 5 400088 653. 48.5 9.2 364.
## 6 400118 752 52.2 9.5 550
## 7 400061 465. 46.3 8.35 350
## 8 400081 292 26.6 8 72
## 9 400133 NA NA NA NA
## 10 400512 NA NA NA NA
## # … with 1 more variable: year_constructed <dbl>
longline %>%
group_by(vessel_code) %>%
distinct(vessel_code, vessel_length, vessel_width, gross_tonnage, carrying_capacity, year_constructed) %>%
ggplot(aes(x = year_constructed, y = gross_tonnage, label = vessel_code)) +
geom_point(aes(size = carrying_capacity)) +
geom_text(check_overlap = TRUE,
size = 3,
hjust = 0,
nudge_x = 1) +
labs(x = "Year Constructed",
y = "Gross Tonnage (tonnes)",
size = "Carrying Capacity") +
xlim(1965, 2000)
longline <- longline %>%
mutate(trip_return_date_clean = lubridate::as_datetime("1900-01-01") + days(round(trip_return_date)),
trip_leaving_date_clean = lubridate::as_datetime("1900-01-01") + days(round(trip_leaving_date)),
trip_duration = trip_return_date_clean - trip_leaving_date_clean,
vessel_code = as.factor(vessel_code)) # Not sure here what to use for the origin for the date.
longline %>%
distinct(vessel_code, trip_number, .keep_all = TRUE) %>%
ggplot() +
geom_point(aes(x = trip_leaving_date_clean, y = vessel_code)) +
geom_point(aes(x = trip_return_date_clean, y = vessel_code)) +
labs(y = "Vessel Code", x = "Date")
Observations
longline %>%
distinct(vessel_code, trip_number, .keep_all = TRUE) %>%
ggplot(aes(x = trip_duration, y = as.factor(vessel_code), fill = as.factor(vessel_code))) +
geom_density_ridges(alpha = 0.75) +
scale_fill_viridis_d() +
labs(y = "Vessel Code",
x = "Trip Duration (Days)",
fill = "Vessel Code")
What were the trip statistics (number of trips made and the minimum, mean, and maximum trip duration)?
longline %>%
group_by(vessel_code) %>%
select(vessel_code, trip_number, trip_duration, haul_number) %>%
distinct(vessel_code, trip_number, haul_number, .keep_all = TRUE) %>%
summarise(max_number_trips = max(trip_number),
max_trip_duration = max(trip_duration),
min_trip_duration = min(trip_duration),
mean_trip_duration = round(mean(trip_duration), 2),
max_hauls = max(haul_number))
## # A tibble: 10 × 6
## vessel_code max_number_trips max_trip_duration min_trip_duration
## <fct> <dbl> <drtn> <drtn>
## 1 400061 10 63 days 19 days
## 2 400062 11 60 days 8 days
## 3 400065 8 73 days 33 days
## 4 400075 8 88 days 19 days
## 5 400081 28 27 days 5 days
## 6 400082 29 161 days 5 days
## 7 400088 4 67 days 60 days
## 8 400118 8 91 days 24 days
## 9 400133 14 9 days 4 days
## 10 400512 8 6 days 5 days
## # … with 2 more variables: mean_trip_duration <drtn>, max_hauls <dbl>
Doublecheck your workflow for this one to see if it makes sense.
longline %>%
filter(total_capture_kg > 0 & total_capture_kg < 20000) %>%
ggplot(aes(x = total_capture_kg, y = vessel_code, fill = vessel_code)) +
geom_density_ridges(alpha = 0.75) +
scale_fill_viridis_d() +
labs(y = "Vessel Code",
x = "Total Catch (1000 kg)",
fill = "Vessel Code") +
facet_wrap(~Year) +
scale_x_continuous(breaks = c(0, 5000, 10000, 15000, 20000), labels = c("0", "5", "10", "15", "20"))
longline %>%
group_by(Year, vessel_code, Month) %>%
summarise(hauls = n()) %>%
ggplot(aes(x = Month,
y = hauls,
group = vessel_code,
color = vessel_code)) +
geom_line() +
scale_color_viridis_d() +
coord_polar() +
facet_wrap(~Year) +
labs(color = "Vessel Code")
The same plot above without the coord_polar
longline %>%
group_by(Year, vessel_code, Month) %>%
summarise(hauls = n()) %>%
ggplot(aes(x = Month,
y = hauls,
group = vessel_code,
color = vessel_code)) +
geom_line() +
scale_color_viridis_d() +
facet_wrap(~Year) +
labs(color = "Vessel Code")
longline <- longline %>%
mutate(startlondd = -startlondd,
startlatdd = -startlatdd,
endlondd = -endlondd,
endlatdd = -endlatdd,
group = as.factor(paste(vessel_code, trip_number, haul_number)))
pal <- colorNumeric(
palette = "Oranges",
domain = longline$Genypterus_blacodes)
longline$labels <- sprintf("<strong>Vessel Code: %s</strong><br/>%g proportion of catch",
longline$vessel_code, longline$Genypterus_blacodes) %>% lapply(htmltools::HTML)
(start_position_map <- leaflet(data = longline) %>%
addProviderTiles(providers$Esri.OceanBasemap) %>%
addCircleMarkers(data = longline[longline$vessel_code == 400062,],
lng = ~startlondd,
lat = ~startlatdd,
clusterOptions = NULL,
color = ~pal(Genypterus_blacodes),
label = longline$labels,
radius = 2,
group = "Vessel 400062") %>%
addCircleMarkers(data = longline[longline$vessel_code == 400065,],
lng = ~startlondd,
lat = ~startlatdd,
clusterOptions = NULL,
color = ~pal(Genypterus_blacodes),
label = longline$labels,
radius = 2,
group = "Vessel 400065") %>%
addLegend(position = "bottomright",
pal = pal,
values = ~Genypterus_blacodes,
title = "Proportion of catch that is pink cusk-eel",
opacity = 1)) %>%
addLayersControl(
overlayGroups = c("Vessel 400065", "Vessel 400062"),
options = layersControlOptions(collapsed = FALSE)
)
(end_position_map <- leaflet(data = longline) %>%
addProviderTiles(providers$Esri.OceanBasemap) %>%
addCircleMarkers(lng = ~endlondd,
lat = ~endlatdd,
clusterOptions = NULL,
color = ~pal(Genypterus_blacodes),
label = longline$labels,
radius = 2) %>%
addLegend(position = "bottomright",
pal = pal,
values = ~Genypterus_blacodes,
title = "Proportion of catch that is pink cusk-eel",
opacity = 1))
What are the highest proportions of species across the different hauls?
longline %>%
select(vessel_code, trip_number, leaving_port, landing_port, haul_number, total_catch, starts_with(match = "P_")) %>%
pivot_longer(cols = starts_with(match = "P_"), names_to = "prop_species", values_to = "proportion") %>%
filter(proportion > 0) %>%
group_by(vessel_code, trip_number) %>%
slice_max(order_by = proportion, n = 10) %>%
ungroup() %>%
select(prop_species) %>%
unique()
## # A tibble: 33 × 1
## prop_species
## <chr>
## 1 P_reineta
## 2 P_varios
## 3 P_cojinoba.2
## 4 P_jurel
## 5 P_tollo3
## 6 P_merluza1
## 7 P_dorado_
## 8 P_brotula
## 9 P_marrajo
## 10 P_bacalao.1
## # … with 23 more rows
longline %>%
select(vessel_code, trip_number, leaving_port, landing_port, haul_number, total_catch, Merluccius_australis, Genypterus_blacodes, starts_with(match = "P_")) %>%
pivot_longer(cols = c(Merluccius_australis, starts_with(match = "P_"), Genypterus_blacodes), names_to = "prop_species", values_to = "proportion") %>%
filter(proportion > 0) %>%
group_by(prop_species) %>%
summarise(n = n()) %>%
arrange(desc(n))
## # A tibble: 77 × 2
## prop_species n
## <chr> <int>
## 1 Genypterus_blacodes 11329
## 2 Merluccius_australis 11075
## 3 P_bacalao.1 4787
## 4 P_cojinoba.2 2634
## 5 P_varios 2150
## 6 P_reineta 1533
## 7 P_brotula 1067
## 8 P_cab2 410
## 9 P_merluza1 359
## 10 P_coji 265
## # … with 67 more rows
How does the weight of congrio dorado and merluza_comun/meluza_del_sur vary in hauls?
longline %>%
filter(total_capture_kg > 0 & total_capture_kg < 20000, vessel_name != "ISLA SOFIA") %>%
select(vessel_code, trip_number, leaving_port, landing_port, haul_number, total_catch, merluza_comun, congrio_dorado, merluza_del_sur, bacalao_de_profundidad, Year) %>%
pivot_longer(cols = c(merluza_comun, congrio_dorado, merluza_del_sur, bacalao_de_profundidad), names_to = "species", values_to = "weight_kg") %>%
ggplot(aes(x = weight_kg, y = vessel_code, fill = species)) +
geom_density_ridges(alpha = 0.75) +
scale_fill_viridis_d() +
labs(y = "Vessel Code",
x = "Weight (kg)",
fill = "Species") #+
#scale_x_continuous(breaks = c(0, 5000, 10000, 15000, 20000), labels = c("0", "5", "10", "15", "20"))
How complete is the fishing information?
visdat::vis_dat(longline[, c("J_time_line_set", "J_time_laying_line", "J_time_start_haul", "J_time_end_haul", "c_laying_time", "c_soak_time", "c_haul_time", "min_depth", "max_depth", "mid_depth", "max_min", "hook_type", "number_of_hooks", "bait", "hook_size")])
What is the range in the soak time?
longline %>%
filter(total_capture_kg > 0 & total_capture_kg < 20000, vessel_name != "ISLA SOFIA") %>%
ggplot(aes(x = c_soak_time, y = vessel_code)) +
geom_density_ridges(alpha = 0.75) +
scale_fill_viridis_d() +
labs(y = "Vessel Code",
x = "Soak Time (hours)")
What is the range in haul time?
longline %>%
filter(total_capture_kg > 0 & total_capture_kg < 20000, vessel_name != "ISLA SOFIA") %>%
ggplot(aes(x = c_haul_time, y = vessel_code)) +
geom_density_ridges(alpha = 0.75) +
scale_fill_viridis_d() +
labs(y = "Vessel Code",
x = "Haul Time (hours)")
What is the range in the min depth?
longline %>%
filter(total_capture_kg > 0 & total_capture_kg < 20000, vessel_name != "ISLA SOFIA") %>%
ggplot(aes(x = min_depth, y = vessel_code)) +
geom_density_ridges(alpha = 0.75) +
scale_fill_viridis_d() +
labs(y = "Vessel Code",
x = "Min Depth (m)")
What is the range in the max depth?
longline %>%
filter(total_capture_kg > 0 & total_capture_kg < 20000, vessel_name != "ISLA SOFIA") %>%
ggplot(aes(x = max_depth, y = vessel_code)) +
geom_density_ridges(alpha = 0.75) +
scale_fill_viridis_d() +
labs(y = "Vessel Code",
x = "Max Depth (m)") +
facet_wrap(~man_zone)
What is the range in the mid depth?
longline %>%
filter(total_capture_kg > 0 & total_capture_kg < 20000, vessel_name != "ISLA SOFIA") %>%
ggplot(aes(x = mid_depth, y = vessel_code)) +
geom_density_ridges(alpha = 0.75) +
scale_fill_viridis_d() +
labs(y = "Vessel Code",
x = "Mid Depth (m)") +
facet_wrap(~man_zone)
Looking at the hauls and filtering out only the groups that have the most:
longline %>%
select(vessel_code, vessel_name, trip_number, leaving_port, landing_port, haul_number, total_catch, Merluccius_australis, Genypterus_blacodes, starts_with(match = "P_"), man_zone) %>%
pivot_longer(cols = c(Merluccius_australis, starts_with(match = "P_"), Genypterus_blacodes), names_to = "prop_species", values_to = "proportion") %>%
filter(proportion > 0, prop_species %in% c("Genypterus_blacodes", "Merluccius_australis", "P_bacalao.1", "P_cojinoba.2", "P_reineta", "P_brotula", "P_cab2", "P_merluza1", "P_coji"), vessel_name != "ISLA SOFIA") %>%
ggplot(aes(x = proportion, y = vessel_code, fill = prop_species)) +
geom_density_ridges(alpha = 0.75) +
scale_fill_viridis_d() +
labs(y = "Vessel Code",
x = "Proportion of Catch") +
facet_wrap(~man_zone)